# Jordan Tam, PhD Thesis research
# University of British Columbia, CA
# Prepared by Tim Waring
# for publication
# 2019.11.04


# Social Learning Game
# Uncertainty calculations and plot


# Number of data points
ndat <- 30

# Social Learning Game choice set
A <- c(60,	180,	60,	60,	140,	130,	190,	60,	90,	90,	120,	50,	80,	190,	110,	130,	200,	150,	110,	100,	220,	150,	190,	170,	100,	130,	170,	70,	90,	170)
B <- c(90,	150,	70,	100,	50,	20,	180,	30,	140,	90,	110,	10,	80,	90,	150,	70,	100,	130,	150,	190,	20,	190,	110,	180,	50,	10,	70,	30,	30,	30)


# Number of repitiions
# Can change this to higher number if needed.
# The code shouldn't require any other changes to accomodate this.
# nrep = 1000 is really slow
nrep <- 1000
# Simulate data using a uniform random distribution

# Produce data set with columns "num", "set", "value"
num <- seq(1,ndat,1)
dataA <- data.frame(num, rep("A",ndat), A)
colnames(dataA) <- c("num","set","value")
dataB <- data.frame(num, rep("B",ndat), B)
colnames(dataB) <- c("num","set","value")

# Combine data into dataframe
data <- rbind(dataA,dataB)
colnames(data) <- c("num","set","value")

# Produce empty data frame to sanmple into
sampled <- data.frame(matrix(0, ncol = 4, nrow = ndat*nrep))
colnames(sampled) <- c("rep","num","set","value")
samp <- c(1,2)

test <- data.frame(matrix(0, ncol = 7, nrow = ndat*nrep))
colnames(test) <- c("rep","num","meanA","meanB","pvalue", "nA","nB")

int <- 0

# For each rep
for(j in 1:nrep)
{
  # For each set of data points
  for(i in 1:ndat)
  {
    # Subset set of data points
    tmp <- data[which(data$num == i),]

    # Randomly select A or B
    x <- sample(samp, size=1)

    # Enter resulting subset information into matrix "sampled"
    sampled[(int+i),2:4] <- tmp[x,]
    sampled[(int+i),1] <- j

    # Subset "sampled" matrix for t-test
    # We want to take t-test with information from num = 1:i
    ABsamp <- seq(1:i)
    sub <- sampled[sampled$num %in% ABsamp,]

    # Make sure to only do t-test for rep = j (max of nrep)
    A <- sub[which(sub$set == 1 & sub$rep == j),]$value # Pull out A
    B <- sub[which(sub$set == 2 & sub$rep == j),]$value # Pull out B

    # If the length of vectors A AND B are greater than 1, then we can do a t-test.
    # Otherwise, we will set the entry to NA
    if(length(A) > 1 & length(B) > 1)
    {
      # Independent 2-group t-test
      ttest <- t.test(A,B)
      # Keep track of rep, num, mean value for A, mean value for B,
      # p-value, lenght of vector A, length of vector B
      test[(int+i),] <- c(j, i, ttest$estimate[1],
                    ttest$estimate[2], ttest$p.value,
                    length(A), length(B))
    }

    # If either the length of vectors A OR B is less than or equal to 1, then set entry to NA
    if(length(A) <= 1 | length(B) <= 1)
    {
      # Set to NA, but keep track of length of vectors A and B
      test[int+i,] <- c(j, i, NA, NA, NA, length(A), length(B))
    }

  }
  # We want to stack the results for all nrep, so change int to int + 30 for next round of results
  int <- int + 30
}


# Mean pvalue at each round
mean_p <- data.frame(matrix(0, ncol = 4, nrow = ndat))
colnames(mean_p) <- c("num","pvalue","lowerCI","upperCI")

for(i in 1:ndat)
{
  # Subset "test" data frame for num = i
  # This should result in length = nrep
  pvalue <- test[which(test$num == i),]$pvalue

  # Set all Nan values to 1 (p-value = 1)
  pvalue[is.na(pvalue)] <- 1

  # Calculate standard deviation for pvalues
  sd_p <- sd(pvalue, na.rm=TRUE)

  # Calculate mean and confidence intervals
  mean_p$num[i] <- i
  mean_p$pvalue[i] <- mean(pvalue, na.rm=TRUE)
  mean_p$lowerCI[i] <- mean(pvalue, na.rm=TRUE) - 1.96*sd_p
  mean_p$upperCI[i] <- mean(pvalue, na.rm=TRUE) + 1.96*sd_p
}


# Set the CIs that fall above one and below zero to 1 and 0 respectively
mean_p$upperCI[mean_p$upperCI>1] <- 1
mean_p$lowerCI[mean_p$lowerCI<0] <- 0

# Plot
require(ggplot2)
p <- ggplot(mean_p, aes(x=num, y=pvalue))
p <- p + geom_line()
p <- p + geom_point()
p <- p + geom_errorbar(aes(ymin=lowerCI, ymax=upperCI))
p <- p + xlab("Round") + ylab("P-value") + ggtitle("Uncertainty in individual learning")
p
